{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用两个value模型估计action的价值,取其中小的计算结果,缓解自举"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEXCAYAAACUBEAgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAdfUlEQVR4nO3de3DTdb7/8VfSNqG3pLSlCZXW1gXBDhe1YMl6dpi1XSrWC4KOehB7lB8qBpaLh7N0V3DW2Znyg5lVUC4eHYGds1pPncULC2p/Bcs6xgKVSrlVdwVagaRAbdIWmrbJ+/eH2+8SKEjapp+Gvh4zmdl+v59v+46a56bfb5LqRERARNTP9KoHIKLBifEhIiUYHyJSgvEhIiUYHyJSgvEhIiUYHyJSgvEhIiUYHyJSgvEhIiWUxWfdunXIyMjAkCFDkJOTgz179qgahYgUUBKfd999F0uWLMGLL76Ir776ChMmTEB+fj4aGhpUjENECuhUvLE0JycHkyZNwmuvvQYA8Pv9SEtLw4IFC7Bs2bKfPN7v9+PUqVOIj4+HTqcL9bhEdI1EBM3NzUhNTYVef/XnNpH9NJOmvb0dVVVVKCoq0rbp9Xrk5eXB4XB0e4zX64XX69W+PnnyJLKyskI+KxH1TH19PUaMGHHVNf0en7Nnz8Ln88FisQRst1gsOHr0aLfHFBcX4/e///1l2+vr62EymUIyJxEFz+PxIC0tDfHx8T+5tt/j0xNFRUVYsmSJ9nXXHTSZTIwP0QB0LadD+j0+ycnJiIiIgMvlCtjucrlgtVq7PcZoNMJoNPbHeETUT/r9apfBYEB2djbKy8u1bX6/H+Xl5bDZbP09DhEpouTXriVLlqCwsBATJ07EHXfcgVdeeQWtra148sknVYxDRAooic8jjzyCM2fOYMWKFXA6nbj11lvx8ccfX3YSmoiuX0pe59NbHo8HZrMZbrebJ5yJBpBgHpt8bxcRKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKcH4EJESjA8RKRF0fHbv3o377rsPqamp0Ol0eP/99wP2iwhWrFiB4cOHIzo6Gnl5efj2228D1jQ2NmLWrFkwmUxISEjAnDlz0NLS0qs7QkThJej4tLa2YsKECVi3bl23+1etWoW1a9di48aNqKysRGxsLPLz89HW1qatmTVrFg4dOoSysjJs27YNu3fvxtNPP93ze0FE4Ud6AYBs3bpV+9rv94vVapXVq1dr25qamsRoNMo777wjIiKHDx8WALJ3715tzY4dO0Sn08nJkyev6ee63W4BIG63uzfjE1EfC+ax2afnfI4dOwan04m8vDxtm9lsRk5ODhwOBwDA4XAgISEBEydO1Nbk5eVBr9ejsrKy2+/r9Xrh8XgCbkQU3vo0Pk6nEwBgsVgCtlssFm2f0+lESkpKwP7IyEgkJiZqay5VXFwMs9ms3dLS0vpybCJSICyudhUVFcHtdmu3+vp61SMRUS/1aXysVisAwOVyBWx3uVzaPqvVioaGhoD9nZ2daGxs1NZcymg0wmQyBdyIKLz1aXwyMzNhtVpRXl6ubfN4PKisrITNZgMA2Gw2NDU1oaqqSluzc+dO+P1+5OTk9OU4RDSARQZ7QEtLC/7+979rXx87dgzV1dVITExEeno6Fi1ahD/84Q8YNWoUMjMzsXz5cqSmpmL69OkAgFtuuQV333035s6di40bN6KjowPz58/Ho48+itTU1D67Y0Q0wAV7KW3Xrl0C4LJbYWGhiPx4uX358uVisVjEaDRKbm6u1NbWBnyPc+fOyWOPPSZxcXFiMpnkySeflObm5muegZfaiQamYB6bOhERhe3rEY/HA7PZDLfbzfM/RANIMI/NsLjaRUTXH8aHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhIiaDiU1xcjEmTJiE+Ph4pKSmYPn06amtrA9a0tbXBbrcjKSkJcXFxmDlzJlwuV8Cauro6FBQUICYmBikpKVi6dCk6Ozt7f2+IKGwEFZ+KigrY7XZ8+eWXKCsrQ0dHB6ZOnYrW1lZtzeLFi/HRRx+htLQUFRUVOHXqFGbMmKHt9/l8KCgoQHt7O7744gts2bIFmzdvxooVK/ruXhHRwCe90NDQIACkoqJCRESampokKipKSktLtTVHjhwRAOJwOEREZPv27aLX68XpdGprNmzYICaTSbxe7zX9XLfbLQDE7Xb3Znwi6mPBPDZ7dc7H7XYDABITEwEAVVVV6OjoQF5enrZmzJgxSE9Ph8PhAAA4HA6MGzcOFotFW5Ofnw+Px4NDhw51+3O8Xi88Hk/AjYjCW4/j4/f7sWjRItx5550YO3YsAMDpdMJgMCAhISFgrcVigdPp1NZcHJ6u/V37ulNcXAyz2azd0tLSejo2EQ0QPY6P3W7HwYMHUVJS0pfzdKuoqAhut1u71dfXh/xnElFoRfbkoPnz52Pbtm3YvXs3RowYoW23Wq1ob29HU1NTwLMfl8sFq9WqrdmzZ0/A9+u6Gta15lJGoxFGo7EnoxLRABXUMx8Rwfz587F161bs3LkTmZmZAfuzs7MRFRWF8vJybVttbS3q6upgs9kAADabDTU1NWhoaNDWlJWVwWQyISsrqzf3hYjCSFDPfOx2O95++2188MEHiI+P187RmM1mREdHw2w2Y86cOViyZAkSExNhMpmwYMEC2Gw2TJ48GQAwdepUZGVlYfbs2Vi1ahWcTideeOEF2O12PrshGkyCuYwGoNvbpk2btDUXLlyQ5557ToYOHSoxMTHy4IMPyunTpwO+z/Hjx2XatGkSHR0tycnJ8vzzz0tHR8c1z8FL7UQDUzCPTZ2IiLr09YzH44HZbIbb7YbJZFI9DhH9UzCPTb63i4iUYHyISAnGh4iUYHyISAnGh4iUYHyISAnGh4iUYHyISAnGh4iU6NG72on6gvj98LW2wt/eDl1EBCKio6EzGKDT6VSPRv2A8aF+JyLoaGzEmR074N6zB+1nz0JvNCJm5Eik3HMP4sePhy4iQvWYFGKMD/UrEcH5775D/euvo7W2FvjnWwt9LS1wnzuHlpoapD7xBIbl5zNA1znGh/qNiKDl8GEcW7UKHT/8ABFBU3s7/tHcDLPBgJ/FxwPnz+Pkn/4Ew7BhME+cyF/BrmOMD/ULEUHrkSM4tnq1Fp661lYs378ftW43YiMj8X9uvhmPZGYC58/DtXUr4seORUR0tOrRKUR4tYtCTkTQfuYMvt+0CR2NjT9uA/B/a2pwuKkJPhF4Ojrw2pEjOPjDDwCA8999B9+FCwqnplBjfCikRAS+5mbU//d/o/WbbwL2eTo6Ar5u9/vh9fn6czxSiPGhkPK3taH+jTfg3rtXO7kMADoAv7RaEXnROZ2bTSbcGBcHAIgwGqHT8z/P6xnP+VDI+NraULdxIxorKi7bp9PpUDhyJOKjovD/Tp/G8OhozL35ZqQMGQIAGDplCiL5KZXXNcaHQsLf2YmTmzahcdeuK66J1OvxcEYGHsrIQNfzH51Oh+jMTFimT+czn+sc40N9Tvx+NO7ahXNXCU8XnU6Hiy+mR5rNGPHUUzD8809w0/WL/9dCfUpE4N67F99v2gR/W1tQx+qjo5H2zDOIHzcuRNPRQML4UJ8REXiqqnDi1Vfha2kJ7mC9HsPuvhtDbTb+ujVI8N8y9QkRQUtNDY6/8go6PZ7gDtbrkXL//UidNYtvqRhEGB/qNRFBu8uF77dsCT48Oh2GTZuGGx5/HHqDITQD0oDE+FCvdXo8OLFuHc5/+21wB+p0GPqLX+CGxx+HLioqNMPRgMWrXdQrnc3NOPHqq2j++uugj429+WakzZmDiNjYEExGAx2f+VCPdTY348TatXDv2RP0scbhw5E+bx6ihg4NwWQUDvjMh3rE39GBU3/+M5oqK4M+1jh8OG5atgwxmZkhmIzCBZ/5UNDE58PZTz/FufLyoI+NSk5GxqJFiM7I6PvBKKwwPhQUEcEPn3+Ok3/6E/xeb1DH6ocMQfozzyB2zBh+SBgxPnTtusJzYv16+IP9rJ2ICFgefJCfTkgaxoeuiYig5eBBnHjttR6FZ/jDD8P68MN8ESFpGB/6SSKCtpMnUf/mm8GHR6+H5f77YX34YegjeX2D/oXxoZ/U0diIE6++igvHjgV3oE6HpNxcDH/kEej5IkK6BONDV9Xxww84/vLLaD1yJOhjY0ePxojCQkTExIRgMgp3jA9dka+1FcdfeQXNBw4EfWx0ZiYyFi9GRHx8CCaj60FQ8dmwYQPGjx8Pk8kEk8kEm82GHTt2aPvb2tpgt9uRlJSEuLg4zJw5Ey6XK+B71NXVoaCgADExMUhJScHSpUvR2dnZN/eG+oy/owOn33sPnh6EZ0h6Om76zW9gtFp5ZYuuKKj4jBgxAitXrkRVVRX27duHu+66Cw888AAOHToEAFi8eDE++ugjlJaWoqKiAqdOncKMGTO0430+HwoKCtDe3o4vvvgCW7ZswebNm7FixYq+vVfUK+L3o2HbNjR8+CEQ5F+TMFgsyPj1r2EcPpzhoavSiVz0JwV6IDExEatXr8ZDDz2EYcOG4e2338ZDDz0EADh69ChuueUWOBwOTJ48GTt27MC9996LU6dOwWKxAAA2btyI3/zmNzhz5gwMV/hIBa/XC+9FL2jzeDxIS0uD2+2GiR8y3qfE78fZsjLUv/EGpL09qGN1BgNu+q//gnnSJIZnkPJ4PDCbzdf02OzxOR+fz4eSkhK0trbCZrOhqqoKHR0dyMvL09aMGTMG6enpcDgcAACHw4Fx48Zp4QGA/Px8eDwe7dlTd4qLi2E2m7VbWlpaT8emq+gKz/dvvhl8eKKicMPs2TBnZzM8dE2Cjk9NTQ3i4uJgNBrx7LPPYuvWrcjKyoLT6YTBYEBCQkLAeovFAqfTCQBwOp0B4ena37XvSoqKiuB2u7VbfX19sGPTTxARNNfU4Pu33gr6bRO6yEjc8MQTSLn3Xr6IkK5Z0K/6Gj16NKqrq+F2u/Hee++hsLAQFd38Xaa+ZDQaYTQaQ/ozBjMRwYUTJ1D/+utBv4hQFxEB68MPMzwUtKDjYzAYMHLkSABAdnY29u7dizVr1uCRRx5Be3s7mpqaAp79uFwuWK1WAIDVasWeSz77petqWNca6l8igvazZ3FizRq0ff990Mcn/vKXsM6YAfBD3ylIvf4vxu/3w+v1Ijs7G1FRUSi/6GMWamtrUVdXB5vNBgCw2WyoqalBQ0ODtqasrAwmkwlZWVm9HYWCJCLoOHsWx1atwvl//CPo42PHjEHqv/87dAYDz/NQ0IJ65lNUVIRp06YhPT0dzc3NePvtt/HZZ5/hk08+gdlsxpw5c7BkyRIkJibCZDJhwYIFsNlsmDx5MgBg6tSpyMrKwuzZs7Fq1So4nU688MILsNvt/LVKAd/58zi+Zg1aa2uDPjZ29GjctHQpDMnJIZiMBoOg4tPQ0IAnnngCp0+fhtlsxvjx4/HJJ5/gV7/6FQDg5Zdfhl6vx8yZM+H1epGfn4/169drx0dERGDbtm2YN28ebDYbYmNjUVhYiJdeeqlv7xX9JF9bG079+c9oPngw6GNjfvYzZD7/PAzDhoVgMhosev06HxWCeS0BXc7f2QlnaSmc//u/kCBfRGgcMQI3/ed/Ijozk79q0WX65XU+FJ7E50PDhx/CWVoadHj0RiPSnnqK4aE+wfgMIuL3o+HDD3Hqf/4HEuT76fRGI9KeeQYmvoiQ+gjjM0iICJoPHMDpd9/tWXjmzkVSbi7DQ32G8RkERAQXvvsOdevXw3f+fFDH6qKiMPzRRxke6nOMzyDg93pR/8Yb8F7lLSxXkviLX/DVyxQSjM8g4N63Dy1HjwZ9nOm223DDf/wH9HwNFoUA4zMINFZUAH5/UMfE33orMhYvRtQlbxQm6iuMz2AQ5CX12DFjkLFwIcNDIcX4DAZBnCiOzsjAjb/+NaISE0M4EBHjMygM/bd/u6YARcTG4oYnnsCQG27glS0KOcZnEEjIyUFCTs5VAxQRG4sb58/niwip3/BPSA4C+uhopM2dC/H74amqCnxbhU6H6BtvROrjj/PvqFO/YnwGAZ1Oh6jkZGQsXowmhwNNlZVoP3MGkfHxMN16KxKnTEFUUhLDQ/2K8RkkdDodImNjkZSbi6S77gK6PsxAr2d0SAnGZ5DR6XRBXf0iChWecCYiJRgfIlKC8SEiJRgfIlKC8SEiJRgfIlKC8SEiJRgfIlKC8SEiJRgfIlKC8SEiJRgfIlKC8SEiJRgfIlKC8SEiJRgfIlKC8SEiJRgfIlKC8SEiJRgfIlKiV/FZuXIldDodFi1apG1ra2uD3W5HUlIS4uLiMHPmTLhcroDj6urqUFBQgJiYGKSkpGDp0qXo7OzszShEFGZ6HJ+9e/fi9ddfx/jx4wO2L168GB999BFKS0tRUVGBU6dOYcaMGdp+n8+HgoICtLe344svvsCWLVuwefNmrFixouf3gojCj/RAc3OzjBo1SsrKymTKlCmycOFCERFpamqSqKgoKS0t1dYeOXJEAIjD4RARke3bt4terxen06mt2bBhg5hMJvF6vd3+vLa2NnG73dqtvr5eAIjb7e7J+EQUIm63+5ofmz165mO321FQUIC8vLyA7VVVVejo6AjYPmbMGKSnp8PhcAAAHA4Hxo0bB4vFoq3Jz8+Hx+PBoUOHuv15xcXFMJvN2i0tLa0nYxPRABJ0fEpKSvDVV1+huLj4sn1OpxMGgwEJCQkB2y0WC5xOp7bm4vB07e/a152ioiK43W7tVl9fH+zYRDTABPUXS+vr67Fw4UKUlZVhyJAhoZrpMkajEUajsd9+HhGFXlDPfKqqqtDQ0IDbb78dkZGRiIyMREVFBdauXYvIyEhYLBa0t7ejqakp4DiXywWr1QoAsFqtl1396vq6aw0RXf+Cik9ubi5qampQXV2t3SZOnIhZs2Zp/zsqKgrl5eXaMbW1tairq4PNZgMA2Gw21NTUoKGhQVtTVlYGk8mErKysPrpbRDTQBfVrV3x8PMaOHRuwLTY2FklJSdr2OXPmYMmSJUhMTITJZMKCBQtgs9kwefJkAMDUqVORlZWF2bNnY9WqVXA6nXjhhRdgt9v5qxXRIBJUfK7Fyy+/DL1ej5kzZ8Lr9SI/Px/r16/X9kdERGDbtm2YN28ebDYbYmNjUVhYiJdeeqmvRyGiAUwnIqJ6iGB5PB6YzWa43W6YTCbV4xDRPwXz2OR7u4hICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhICcaHiJRgfIhIiUjVA/SEiAAAPB6P4kmI6GJdj8mux+jVhGV8zp07BwBIS0tTPAkRdae5uRlms/mqa8IyPomJiQCAurq6n7yDA43H40FaWhrq6+thMplUj3PNOHf/Cte5RQTNzc1ITU39ybVhGR+9/sdTVWazOaz+xVzMZDKF5eycu3+F49zX+oSAJ5yJSAnGh4iUCMv4GI1GvPjiizAajapHCVq4zs65+1e4zh0MnVzLNTEioj4Wls98iCj8MT5EpATjQ0RKMD5EpATjQ0RKhGV81q1bh4yMDAwZMgQ5OTnYs2eP0nl2796N++67D6mpqdDpdHj//fcD9osIVqxYgeHDhyM6Ohp5eXn49ttvA9Y0NjZi1qxZMJlMSEhIwJw5c9DS0hLSuYuLizFp0iTEx8cjJSUF06dPR21tbcCatrY22O12JCUlIS4uDjNnzoTL5QpYU1dXh4KCAsTExCAlJQVLly5FZ2dnyObesGEDxo8fr73612azYceOHQN65u6sXLkSOp0OixYtCrvZ+4SEmZKSEjEYDPLWW2/JoUOHZO7cuZKQkCAul0vZTNu3b5ff/e538pe//EUAyNatWwP2r1y5Usxms7z//vvy9ddfy/333y+ZmZly4cIFbc3dd98tEyZMkC+//FL+9re/yciRI+Wxxx4L6dz5+fmyadMmOXjwoFRXV8s999wj6enp0tLSoq159tlnJS0tTcrLy2Xfvn0yefJk+fnPf67t7+zslLFjx0peXp7s379ftm/fLsnJyVJUVBSyuT/88EP561//Kt98843U1tbKb3/7W4mKipKDBw8O2JkvtWfPHsnIyJDx48fLwoULte3hMHtfCbv43HHHHWK327WvfT6fpKamSnFxscKp/uXS+Pj9frFarbJ69WptW1NTkxiNRnnnnXdEROTw4cMCQPbu3aut2bFjh+h0Ojl58mS/zd7Q0CAApKKiQpszKipKSktLtTVHjhwRAOJwOETkx/Dq9XpxOp3amg0bNojJZBKv19tvsw8dOlTefPPNsJi5ublZRo0aJWVlZTJlyhQtPuEwe18Kq1+72tvbUVVVhby8PG2bXq9HXl4eHA6Hwsmu7NixY3A6nQEzm81m5OTkaDM7HA4kJCRg4sSJ2pq8vDzo9XpUVlb226xutxvAvz41oKqqCh0dHQGzjxkzBunp6QGzjxs3DhaLRVuTn58Pj8eDQ4cOhXxmn8+HkpIStLa2wmazhcXMdrsdBQUFATMC4fHPuy+F1bvaz549C5/PF/APHgAsFguOHj2qaKqrczqdANDtzF37nE4nUlJSAvZHRkYiMTFRWxNqfr8fixYtwp133omxY8dqcxkMBiQkJFx19u7uW9e+UKmpqYHNZkNbWxvi4uKwdetWZGVlobq6esDODAAlJSX46quvsHfv3sv2DeR/3qEQVvGh0LHb7Th48CA+//xz1aNck9GjR6O6uhputxvvvfceCgsLUVFRoXqsq6qvr8fChQtRVlaGIUOGqB5HubD6tSs5ORkRERGXnf13uVywWq2Kprq6rrmuNrPVakVDQ0PA/s7OTjQ2NvbL/Zo/fz62bduGXbt2YcSIEdp2q9WK9vZ2NDU1XXX27u5b175QMRgMGDlyJLKzs1FcXIwJEyZgzZo1A3rmqqoqNDQ04Pbbb0dkZCQiIyNRUVGBtWvXIjIyEhaLZcDOHgphFR+DwYDs7GyUl5dr2/x+P8rLy2Gz2RROdmWZmZmwWq0BM3s8HlRWVmoz22w2NDU1oaqqSluzc+dO+P1+5OTkhGw2EcH8+fOxdetW7Ny5E5mZmQH7s7OzERUVFTB7bW0t6urqAmavqakJiGdZWRlMJhOysrJCNvul/H4/vF7vgJ45NzcXNTU1qK6u1m4TJ07ErFmztP89UGcPCdVnvINVUlIiRqNRNm/eLIcPH5ann35aEhISAs7+97fm5mbZv3+/7N+/XwDIH//4R9m/f7+cOHFCRH681J6QkCAffPCBHDhwQB544IFuL7XfdtttUllZKZ9//rmMGjUq5Jfa582bJ2azWT777DM5ffq0djt//ry25tlnn5X09HTZuXOn7Nu3T2w2m9hsNm1/16XfqVOnSnV1tXz88ccybNiwkF76XbZsmVRUVMixY8fkwIEDsmzZMtHpdPLpp58O2Jmv5OKrXeE2e2+FXXxERF599VVJT08Xg8Egd9xxh3z55ZdK59m1a5cAuOxWWFgoIj9ebl++fLlYLBYxGo2Sm5srtbW1Ad/j3Llz8thjj0lcXJyYTCZ58sknpbm5OaRzdzczANm0aZO25sKFC/Lcc8/J0KFDJSYmRh588EE5ffp0wPc5fvy4TJs2TaKjoyU5OVmef/556ejoCNncTz31lNx4441iMBhk2LBhkpubq4VnoM58JZfGJ5xm7y1+ng8RKRFW53yI6PrB+BCREowPESnB+BCREowPESnB+BCREowPESnB+BCREowPESnB+BCREowPESnx/wFP7z8YreQTDgAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import gym\n",
    "\n",
    "\n",
    "#定义环境\n",
    "class MyWrapper(gym.Wrapper):\n",
    "\n",
    "    def __init__(self):\n",
    "        env = gym.make('Pendulum-v1', render_mode='rgb_array')\n",
    "        super().__init__(env)\n",
    "        self.env = env\n",
    "        self.step_n = 0\n",
    "\n",
    "    def reset(self):\n",
    "        state, _ = self.env.reset()\n",
    "        self.step_n = 0\n",
    "        return state\n",
    "\n",
    "    def step(self, action):\n",
    "        state, reward, terminated, truncated, info = self.env.step(\n",
    "            [action * 2])\n",
    "        over = terminated or truncated\n",
    "\n",
    "        #偏移reward,便于训练\n",
    "        reward = (reward + 8) / 8\n",
    "\n",
    "        #限制最大步数\n",
    "        self.step_n += 1\n",
    "        if self.step_n >= 200:\n",
    "            over = True\n",
    "\n",
    "        return state, reward, over\n",
    "\n",
    "    #打印游戏图像\n",
    "    def show(self):\n",
    "        from matplotlib import pyplot as plt\n",
    "        plt.figure(figsize=(3, 3))\n",
    "        plt.imshow(self.env.render())\n",
    "        plt.show()\n",
    "\n",
    "\n",
    "env = MyWrapper()\n",
    "\n",
    "env.reset()\n",
    "\n",
    "env.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-6.3993e-05],\n",
       "        [ 4.0095e-02]], grad_fn=<TanhBackward0>)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "\n",
    "class Model(torch.nn.Module):\n",
    "\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.s = torch.nn.Sequential(\n",
    "            torch.nn.Linear(3, 64),\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.Linear(64, 64),\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.Linear(64, 1),\n",
    "            torch.nn.Tanh(),\n",
    "        )\n",
    "\n",
    "    def forward(self, state):\n",
    "        return self.s(state)\n",
    "\n",
    "\n",
    "model_action = Model()\n",
    "model_action_delay = Model()\n",
    "model_action_delay.load_state_dict(model_action.state_dict())\n",
    "\n",
    "model_action(torch.randn(2, 3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([[0.0523],\n",
       "         [0.2258]], grad_fn=<AddmmBackward0>),\n",
       " tensor([[0.1537],\n",
       "         [0.0038]], grad_fn=<AddmmBackward0>))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_value1 = torch.nn.Sequential(\n",
    "    torch.nn.Linear(4, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 1),\n",
    ")\n",
    "model_value1_delay = torch.nn.Sequential(\n",
    "    torch.nn.Linear(4, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 1),\n",
    ")\n",
    "model_value1_delay.load_state_dict(model_value1.state_dict())\n",
    "\n",
    "model_value2 = torch.nn.Sequential(\n",
    "    torch.nn.Linear(4, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 1),\n",
    ")\n",
    "model_value2_delay = torch.nn.Sequential(\n",
    "    torch.nn.Linear(4, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 64),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(64, 1),\n",
    ")\n",
    "model_value2_delay.load_state_dict(model_value2.state_dict())\n",
    "\n",
    "model_value1(torch.randn(2, 4)), model_value2(torch.randn(2, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\appDir\\python3.10\\lib\\site-packages\\gym\\utils\\passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`.  (Deprecated NumPy 1.24)\n",
      "  if not isinstance(terminated, (bool, np.bool8)):\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "45.89648062332471"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from IPython import display\n",
    "import random\n",
    "\n",
    "\n",
    "#玩一局游戏并记录数据\n",
    "def play(show=False):\n",
    "    data = []\n",
    "    reward_sum = 0\n",
    "\n",
    "    state = env.reset()\n",
    "    over = False\n",
    "    while not over:\n",
    "        action = model_action(torch.FloatTensor(state).reshape(1, 3)).item()\n",
    "\n",
    "        #给动作添加噪声,增加探索\n",
    "        action += random.normalvariate(mu=0, sigma=0.2)\n",
    "\n",
    "        next_state, reward, over = env.step(action)\n",
    "\n",
    "        data.append((state, action, reward, next_state, over))\n",
    "        reward_sum += reward\n",
    "\n",
    "        state = next_state\n",
    "\n",
    "        if show:\n",
    "            display.clear_output(wait=True)\n",
    "            env.show()\n",
    "\n",
    "    return data, reward_sum\n",
    "\n",
    "\n",
    "play()[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_17124\\1738991660.py:27: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ..\\torch\\csrc\\utils\\tensor_new.cpp:248.)\n",
      "  state = torch.FloatTensor([i[0] for i in data]).reshape(-1, 3)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(torch.Size([64, 3]),\n",
       " 200,\n",
       " (array([0.66394275, 0.74778336, 0.4766118 ], dtype=float32),\n",
       "  0.2161717475451006,\n",
       "  0.9079437567690016,\n",
       "  array([0.62174135, 0.7832226 , 1.1023009 ], dtype=float32),\n",
       "  False))"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#数据池\n",
    "class Pool:\n",
    "\n",
    "    def __init__(self):\n",
    "        self.pool = []\n",
    "\n",
    "    def __len__(self):\n",
    "        return len(self.pool)\n",
    "\n",
    "    def __getitem__(self, i):\n",
    "        return self.pool[i]\n",
    "\n",
    "    #更新动作池\n",
    "    def update(self):\n",
    "        #每次更新不少于N条新数据\n",
    "        old_len = len(self.pool)\n",
    "        while len(pool) - old_len < 200:\n",
    "            self.pool.extend(play()[0])\n",
    "\n",
    "        #只保留最新的N条数据\n",
    "        self.pool = self.pool[-2_0000:]\n",
    "\n",
    "    #获取一批数据样本\n",
    "    def sample(self):\n",
    "        data = random.sample(self.pool, 64)\n",
    "\n",
    "        state = torch.FloatTensor([i[0] for i in data]).reshape(-1, 3)\n",
    "        action = torch.FloatTensor([i[1] for i in data]).reshape(-1, 1)\n",
    "        reward = torch.FloatTensor([i[2] for i in data]).reshape(-1, 1)\n",
    "        next_state = torch.FloatTensor([i[3] for i in data]).reshape(-1, 3)\n",
    "        over = torch.LongTensor([i[4] for i in data]).reshape(-1, 1)\n",
    "\n",
    "        return state, action, reward, next_state, over\n",
    "\n",
    "\n",
    "pool = Pool()\n",
    "pool.update()\n",
    "state, action, reward, next_state, over = pool.sample()\n",
    "\n",
    "next_state.shape, len(pool), pool[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer_action = torch.optim.Adam(model_action.parameters(), lr=5e-4)\n",
    "optimizer_value1 = torch.optim.Adam(model_value1.parameters(), lr=5e-3)\n",
    "optimizer_value2 = torch.optim.Adam(model_value2.parameters(), lr=5e-3)\n",
    "\n",
    "\n",
    "def soft_update(_from, _to):\n",
    "    for _from, _to in zip(_from.parameters(), _to.parameters()):\n",
    "        value = _to.data * 0.7 + _from.data * 0.3\n",
    "        _to.data.copy_(value)\n",
    "\n",
    "\n",
    "def requires_grad(model, value):\n",
    "    for param in model.parameters():\n",
    "        param.requires_grad_(value)\n",
    "\n",
    "\n",
    "requires_grad(model_action_delay, False)\n",
    "requires_grad(model_value1_delay, False)\n",
    "requires_grad(model_value2_delay, False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-0.20000436902046204"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def train_action(state):\n",
    "    requires_grad(model_action, True)\n",
    "    requires_grad(model_value1, False)\n",
    "    requires_grad(model_value2, False)\n",
    "\n",
    "    #首先把动作计算出来\n",
    "    action = model_action(state)\n",
    "\n",
    "    #使用value网络评估动作的价值,价值是越高越好\n",
    "    input = torch.cat([state, action], dim=1)\n",
    "    value1 = model_value1(input)\n",
    "    value2 = model_value2(input)\n",
    "    loss = -torch.min(value1, value1).mean()\n",
    "\n",
    "    loss.backward()\n",
    "    optimizer_action.step()\n",
    "    optimizer_action.zero_grad()\n",
    "\n",
    "    return loss.item()\n",
    "\n",
    "\n",
    "train_action(state)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.2325245887041092, 0.24681411683559418)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def train_value(state, action, reward, next_state, over):\n",
    "    requires_grad(model_action, False)\n",
    "    requires_grad(model_value1, True)\n",
    "    requires_grad(model_value2, True)\n",
    "\n",
    "    #计算value\n",
    "    input = torch.cat([state, action], dim=1)\n",
    "    value1 = model_value1(input)\n",
    "    value2 = model_value2(input)\n",
    "\n",
    "    #计算target\n",
    "    next_action = model_action_delay(next_state)\n",
    "    input = torch.cat([next_state, next_action], dim=1)\n",
    "    with torch.no_grad():\n",
    "        target1 = model_value1_delay(input)\n",
    "        target2 = model_value2_delay(input)\n",
    "    target = torch.min(target1, target2)\n",
    "    target = target * 0.99 * (1 - over) + reward\n",
    "\n",
    "    #计算td loss,更新参数\n",
    "    loss1 = torch.nn.functional.mse_loss(value1, target)\n",
    "    loss2 = torch.nn.functional.mse_loss(value2, target)\n",
    "\n",
    "    loss1.backward()\n",
    "    optimizer_value1.step()\n",
    "    optimizer_value1.zero_grad()\n",
    "\n",
    "    loss2.backward()\n",
    "    optimizer_value2.step()\n",
    "    optimizer_value2.zero_grad()\n",
    "\n",
    "    return loss1.item(), loss2.item()\n",
    "\n",
    "\n",
    "train_value(state, action, reward, next_state, over)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 400 19.072040866456167\n"
     ]
    }
   ],
   "source": [
    "#训练\n",
    "def train():\n",
    "    model_action.train()\n",
    "    model_value1.train()\n",
    "    model_value2.train()\n",
    "\n",
    "    #共更新N轮数据\n",
    "    for epoch in range(10):\n",
    "        pool.update()\n",
    "\n",
    "        #每次更新数据后,训练N次\n",
    "        for i in range(20):\n",
    "\n",
    "            #采样N条数据\n",
    "            state, action, reward, next_state, over = pool.sample()\n",
    "\n",
    "            #训练模型\n",
    "            train_action(state)\n",
    "            train_value(state, action, reward, next_state, over)\n",
    "\n",
    "        soft_update(model_action, model_action_delay)\n",
    "        soft_update(model_value1, model_value1_delay)\n",
    "        soft_update(model_value2, model_value2_delay)\n",
    "\n",
    "        if epoch % 20 == 0:\n",
    "            test_result = sum([play()[-1] for _ in range(20)]) / 20\n",
    "            print(epoch, len(pool), test_result)\n",
    "\n",
    "\n",
    "train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAR8AAAEXCAYAAACUBEAgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAef0lEQVR4nO3dfXBU9b0/8PfZxzzuJoFkl5RE4oBg5KEVBLZ2xmmJRBofUOy0yNgM5epFAwPicEuq4tTpTPjhTK22Cp3prXg71XRwGh8oaHODhjosAQKp4SnoXCAZwiZAmt0Qkt3N7uf3B+TURcRdyO53N75fM2fGPd/PJp8D7Nuz53seNBEREBElmEF1A0T0zcTwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJZSFz6uvvooJEyYgLS0Nc+bMwd69e1W1QkQKKAmfv/zlL1izZg2ef/55HDhwADNmzEB5eTm6u7tVtENECmgqLiydM2cO7rjjDvzud78DAITDYRQVFWHlypVYt27d174/HA6js7MT2dnZ0DQt3u0SUZREBH19fSgsLITBcO19G1OCetIFAgE0NzejurpaX2cwGFBWVga3233V9/j9fvj9fv316dOnUVpaGvdeiej6dHR0YPz48desSXj4nDt3DqFQCA6HI2K9w+HAsWPHrvqempoa/PKXv/zS+o6ODthstrj0SUSx8/l8KCoqQnZ29tfWJjx8rkd1dTXWrFmjvx7eQJvNxvAhSkLRHA5JePiMHTsWRqMRXV1dEeu7urrgdDqv+h6r1Qqr1ZqI9ogoQRI+22WxWDBz5kw0NDTo68LhMBoaGuByuRLdDhEpouRr15o1a1BZWYlZs2Zh9uzZ+M1vfoP+/n4sXbpURTtEpICS8Pnxj3+Ms2fPYv369fB4PPj2t7+NDz744EsHoYlo9FJyns+N8vl8sNvt8Hq9POBMlERi+Wzy2i4iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEiJmMNn165duO+++1BYWAhN0/DOO+9EjIsI1q9fj3HjxiE9PR1lZWX47LPPImp6enqwZMkS2Gw25OTkYNmyZbhw4cINbQgRpZaYw6e/vx8zZszAq6++etXxjRs34pVXXsHmzZvR1NSEzMxMlJeXY3BwUK9ZsmQJDh8+jPr6emzbtg27du3C448/fv1bQUSpR24AAKmrq9Nfh8NhcTqd8uKLL+rrent7xWq1yltvvSUiIkeOHBEAsm/fPr1mx44dommanD59Oqrf6/V6BYB4vd4baZ+IRlgsn80RPeZz4sQJeDwelJWV6evsdjvmzJkDt9sNAHC73cjJycGsWbP0mrKyMhgMBjQ1NV315/r9fvh8voiFiFLbiIaPx+MBADgcjoj1DodDH/N4PCgoKIgYN5lMyMvL02uuVFNTA7vdri9FRUUj2TYRKZASs13V1dXwer360tHRobolIrpBIxo+TqcTANDV1RWxvqurSx9zOp3o7u6OGB8aGkJPT49ecyWr1QqbzRaxEFFqG9HwKSkpgdPpRENDg77O5/OhqakJLpcLAOByudDb24vm5ma9ZufOnQiHw5gzZ85ItkNEScwU6xsuXLiAzz//XH994sQJtLS0IC8vD8XFxVi9ejV+9atfYdKkSSgpKcFzzz2HwsJCLFy4EABw66234p577sFjjz2GzZs3IxgMYsWKFfjJT36CwsLCEdswIkpysU6lffTRRwLgS0tlZaWIXJpuf+6558ThcIjVapV58+ZJW1tbxM84f/68LF68WLKyssRms8nSpUulr68v6h441U6UnGL5bGoiIgqz77r4fD7Y7XZ4vV4e/yFKIrF8NlNitouIRh+GDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlYgqfmpoa3HHHHcjOzkZBQQEWLlyItra2iJrBwUFUVVVhzJgxyMrKwqJFi9DV1RVR097ejoqKCmRkZKCgoABr167F0NDQjW8NEaWMmMKnsbERVVVV2LNnD+rr6xEMBjF//nz09/frNU899RTef/99bN26FY2Njejs7MRDDz2kj4dCIVRUVCAQCGD37t144403sGXLFqxfv37ktoqIkp/cgO7ubgEgjY2NIiLS29srZrNZtm7dqtccPXpUAIjb7RYRke3bt4vBYBCPx6PXbNq0SWw2m/j9/qh+r9frFQDi9XpvpH0iGmGxfDZv6JiP1+sFAOTl5QEAmpubEQwGUVZWptdMmTIFxcXFcLvdAAC3241p06bB4XDoNeXl5fD5fDh8+PBVf4/f74fP54tYiCi1XXf4hMNhrF69GnfeeSemTp0KAPB4PLBYLMjJyYmodTgc8Hg8es0Xg2d4fHjsampqamC32/WlqKjoetsmoiRx3eFTVVWFQ4cOoba2diT7uarq6mp4vV596ejoiPvvJKL4Ml3Pm1asWIFt27Zh165dGD9+vL7e6XQiEAigt7c3Yu+nq6sLTqdTr9m7d2/EzxueDRuuuZLVaoXVar2eVokoScW05yMiWLFiBerq6rBz506UlJREjM+cORNmsxkNDQ36ura2NrS3t8PlcgEAXC4XWltb0d3drdfU19fDZrOhtLT0RraFiFJITHs+VVVVePPNN/Huu+8iOztbP0Zjt9uRnp4Ou92OZcuWYc2aNcjLy4PNZsPKlSvhcrkwd+5cAMD8+fNRWlqKRx99FBs3boTH48Gzzz6Lqqoq7t0QfZPEMo0G4KrL66+/rtcMDAzIk08+Kbm5uZKRkSEPPvignDlzJuLnnDx5UhYsWCDp6ekyduxYefrppyUYDEbdB6faiZJTLJ9NTUREXfRdH5/PB7vdDq/XC5vNprodIrosls8mr+0iIiUYPkSkBMOHiJRg+BCREgwfIlLius5wJrpew5OrFy5cQGdnJ/r7+zF58mRkZmYq7owSjeFDCSMiaG9vx3vvvYdTp04hNzcX+fn5KC4uZvh8AzF8KCGCwSC2bdsGt9uNe++9F0uXLkVmZiY0TVPdGinC8KG4CwaD+POf/wyfz4dnnnkGNpuNoUMMH4ovEUF9fT16e3uxfPlypKWl/XssHEaovx/hQACa0Qhjejo0i4XB9A3B8KG4ERF0dXWhsbER69at04NHRBDs6cHZHTvg3bsXgXPnYLBakTFxIgp++ENkT58OzWhU3D3FG8OH4mr79u0oKyvT7+8kIvB3duLkyy+jv60NuDz7FbpwAd7z53GhtRWFP/0p8svL9QAaniHjHtHowvN8KG4GBwdx/PhxzJ49Ww+O0MWLaP/979F/7BgkHMa//H7sP3cOn/l8CIsgdPEiTv/P/8B74IAeOn6/H36/X+WmUBwwfChuOjs7kZOTE3F1s3ffPvR9+umlaff+fqzauxdVe/bgP3fvRu2JEwiJIHzxIrrq6hAeHISIoK6uDk1NTUjBGzDQNTB8KG56e3uRn58f8XXpwpEjQDgMAfD/WltxpLcXIRH4gkH87uhRHPrXvwAAF//v/xC6eBEDAwP405/+hHfffVfRVlC8MHwoboaGhpCRkaG/lnAY4UBAf+0LBiPqA+Ew/KHQpRfhMIa8Xpw6dQperxcnTpzAvy4HE40OPOBMcWMymSKeZhvs7YV33z4AgAbg+04nPvf5MHT569QtNhtuysoCAIQDAfQfP45xc+fikUceweTJk5Genp7wbaD4YfhQ3OTm5uLs2bMQEWiadmkxXfonp2kaKidORLbZjP89cwbj0tPx2C23oGD4PKBwGMGeHoyx2eD1ejFp0iSGzyjD8KG4KSws1J8wa7fbYczMROakSfBefnSSyWDAjyZMwMMTJmD4qNAXjw+FLl5Eb08P+vr6MG7cOAVbQPHEYz4UN1arFbfccgv27t17ae/HbIb5iqfZapoGw/Be0RXn8QycPImm3btx2223wWw2J7BzSgSGD8XVggUL9MsrNE2DFmWIiAg6Dx9GQ3095s+fzxMMRyGGD8WNpmlwOBz4/ve/jy1btmBgYABZt90GGL7+n10gHMZ7J0+i7O67kZ+fn4BuKdEYPhRXmqbh7rvvRm5uLjZv3gz/19xGQ0TQFwxi68mTyM/IgGviRO71jFIMH4o7o9GIJUuW4Oabb8bG115D89mzuDg0FHHGsojg4tAQ9p8/j/8+fhzFmZn4QW4u/CdPqmuc4oqzXRR3mqbBbDbj/vvvx4xbb8UfV65EY1sbbGYzss1mCIC+YBB9wSC+lZGBxTffjIK0NGiaBrkcUtz7GX0YPpQwmqah+Oab8ajLBY/RiHODg7h4+YzmDKMRY9PSkG40RgRNoLv70pXvDJ9Rh+FDCaUZjTCkpSHDZELx5bOZr6Xv008hoRC0KA5SU2rh3ygllKZpsDocUdeH/X79nj80ujB8KOEyb7016tpwIIChvr44dkOqMHwo4cy5uVHXDvl8GOCM16jE8KGEM1gsMFitUdVKMIhQfz9vJDYKMXwo4Sz5+bDGcKEov3aNTgwfSjhjRgaMUcx0Detva4tjN6QKw4cST9NgsFiiLh/s6OCM1yjE8CElsqdNi7pWQiHIFbdcpdTH8CElrIWFUdcO+XwI9vTEsRtSIabw2bRpE6ZPnw6bzQabzQaXy4UdO3bo44ODg6iqqsKYMWOQlZWFRYsWoaurK+JntLe3o6KiAhkZGSgoKMDatWsxNDQ0MltDKUHTNBi/8NjkrxPs6YHf44ljR6RCTOEzfvx4bNiwAc3Nzdi/fz9+8IMf4IEHHsDhw4cBAE899RTef/99bN26FY2Njejs7MRDDz2kvz8UCqGiogKBQAC7d+/GG2+8gS1btmD9+vUju1WU9Cz5+TBdcVfDa5ErroKn1KfJDf6N5uXl4cUXX8TDDz+M/Px8vPnmm3j44YcBAMeOHcOtt94Kt9uNuXPnYseOHbj33nvR2dkJx+VT7Ddv3oyf//znOHv2LCxfcRDyyidW+nw+FBUVwev1RjyQjlJH6OJFHFu79tLB5CiM/4//QMF99/Hq9iQ3fL/uaD6b133MJxQKoba2Fv39/XC5XGhubkYwGERZWZleM2XKFBQXF8PtdgMA3G43pk2bpgcPAJSXl8Pn8+l7T1dTU1MDu92uL0VFRdfbNiUJQ1paTDNevpaW+DVDSsQcPq2trcjKyoLVasXy5ctRV1eH0tJSeDweWCwW5FyxK+1wOOC5/H3d4/FEBM/w+PDYV6murobX69WXjij/b0nJLZYTDeULDxuk0SHmW2pMnjwZLS0t8Hq9ePvtt1FZWYnGxsZ49KazWq2wRnk6PqUITUPGpEn41yefRFUe6u9HeGAAxi88AZVSW8x7PhaLBRMnTsTMmTNRU1ODGTNm4OWXX4bT6UQgEEBvb29EfVdXF5xOJwDA6XR+afZr+PVwDX1zxHKB6eDp0wicPRvHbijRbvg8n3A4DL/fj5kzZ8JsNqOhoUEfa2trQ3t7O1wuFwDA5XKhtbUV3d3dek19fT1sNhtKS0tvtBVKIZqmQTMao64P+/0Rz3mn1BfT167q6mosWLAAxcXF6Ovrw5tvvomPP/4YH374Iex2O5YtW4Y1a9YgLy8PNpsNK1euhMvlwty5cwEA8+fPR2lpKR599FFs3LgRHo8Hzz77LKqqqvi16hsovaQEJrsdQ17v1xeLRFdHKSOm8Onu7sZPf/pTnDlzBna7HdOnT8eHH36Iu+++GwDw0ksvwWAwYNGiRfD7/SgvL8drr72mv99oNGLbtm144okn4HK5kJmZicrKSrzwwgsju1WUEixjxsCQng5EGSoXjh6FfdasOHdFiXLD5/moEMu5BJS8woEAjqxcCf+ZM1HV537ve7j5v/4rzl3RjUjIeT5EN0ozGpFeUhJ1fdjvh/BSnFGD4UPqGAzImDgx6nJ/dzeG+vvj2BAlEsOHlIrlAlP/6dMI8a6GowbDh5TRNA2mnBxopujnPcL82jVqMHxIqcxJky7NeEVBwuGoD05T8mP4kFLGzMzoTzYMh9HX2hrfhihhGD6klGYwxHTcR4JB3tdnlGD4kFIGqxVZU6dGXR/0enk/51GC4UNqGQwxXWB68fhxhDjdPiowfEgpTdNimu0KDQxAwuE4dkSJwvAh5bJKS6FFe1fDcJgXmI4SDB9SzupwRD3jFQ4G0f/ZZ3HuiBKB4UPKaWZz9MXhMILnzsWvGUoYhg8pZ0xLQ1oMDwXgcZ/RgeFDyhnS0pA2fnzU9QOnTnG6fRRg+JBymtEIYwx3shw4dYrXeI0CDB9KCpb8fCDaBwKKcM9nFGD4UFLILC2FZojun2M4EID/iqegUOph+FBSMNtsUe/5hAcGcPHzz+PcEcUbw4eSgmYywRDDBabgxaUpj+FDScGUkxPT/Zwp9TF8KCkYrFZkT50a/UHnKI8PUfLi3yAlBU3T4Fi4EJlTpnxtrSknB7Zvfzv+TVFcMXwoaRjS0lD4yCMw2e1fWaNZLHAuWgSr05nAzigeGD6UNDRNQ/a0aShavhyWgoIvjRszMzHuRz9C/j33RD0tT8krpsclE8WbZjAg97vfRcZNN6Fn1y5caGuDBINInzABed/7HjInT47+ns+U1Bg+lHQ0TYP1W9/CuMWLrzpGowPDh5ISQ2b04xdnIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUuKHw2bBhAzRNw+rVq/V1g4ODqKqqwpgxY5CVlYVFixah64q7zrW3t6OiogIZGRkoKCjA2rVrMcR78hJ9o1x3+Ozbtw+///3vMX369Ij1Tz31FN5//31s3boVjY2N6OzsxEMPPaSPh0IhVFRUIBAIYPfu3XjjjTewZcsWrF+//vq3gohSj1yHvr4+mTRpktTX18tdd90lq1atEhGR3t5eMZvNsnXrVr326NGjAkDcbreIiGzfvl0MBoN4PB69ZtOmTWKz2cTv91/19w0ODorX69WXjo4OASBer/d62ieiOPF6vVF/Nq9rz6eqqgoVFRUoKyuLWN/c3IxgMBixfsqUKSguLobb7QYAuN1uTJs2DQ6HQ68pLy+Hz+fD4cOHr/r7ampqYLfb9aUohgfMEVFyijl8amtrceDAAdTU1HxpzOPxwGKxICcnJ2K9w+GAx+PRa74YPMPjw2NXU11dDa/Xqy8dHR2xtk1ESSamC0s7OjqwatUq1NfXIy2Wm33fIKvVCmsMD5UjouQX055Pc3Mzuru7cfvtt8NkMsFkMqGxsRGvvPIKTCYTHA4HAoEAent7I97X1dUF5+U7zzmdzi/Nfg2/dvLudETfGDGFz7x589Da2oqWlhZ9mTVrFpYsWaL/t9lsRkNDg/6etrY2tLe3w+VyAQBcLhdaW1vR3d2t19TX18Nms6G0tHSENouIkl1MX7uys7MxderUiHWZmZkYM2aMvn7ZsmVYs2YN8vLyYLPZsHLlSrhcLsydOxcAMH/+fJSWluLRRx/Fxo0b4fF48Oyzz6KqqopfrYi+QUb8ZmIvvfQSDAYDFi1aBL/fj/Lycrz22mv6uNFoxLZt2/DEE0/A5XIhMzMTlZWVeOGFF0a6FSJKYppI6j360efzwW63w+v1wmazqW6HiC6L5bPJa7uISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUYPgQkRIMHyJSguFDREowfIhICYYPESnB8CEiJRg+RKQEw4eIlGD4EJESDB8iUoLhQ0RKMHyISAmGDxEpwfAhIiUYPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iUMKlu4HqICADA5/Mp7oSIvmj4Mzn8Gb2WlAyf8+fPAwCKiooUd0JEV9PX1we73X7NmpQMn7y8PABAe3v7125gsvH5fCgqKkJHRwdsNpvqdqLGvhMrVfsWEfT19aGwsPBra1MyfAyGS4eq7HZ7Sv3FfJHNZkvJ3tl3YqVi39HuEPCAMxEpwfAhIiVSMnysViuef/55WK1W1a3ELFV7Z9+Jlap9x0KTaObEiIhGWEru+RBR6mP4EJESDB8iUoLhQ0RKMHyISImUDJ9XX30VEyZMQFpaGubMmYO9e/cq7WfXrl247777UFhYCE3T8M4770SMiwjWr1+PcePGIT09HWVlZfjss88ianp6erBkyRLYbDbk5ORg2bJluHDhQlz7rqmpwR133IHs7GwUFBRg4cKFaGtri6gZHBxEVVUVxowZg6ysLCxatAhdXV0RNe3t7aioqEBGRgYKCgqwdu1aDA0Nxa3vTZs2Yfr06frZvy6XCzt27Ejqnq9mw4YN0DQNq1evTrneR4SkmNraWrFYLPLHP/5RDh8+LI899pjk5ORIV1eXsp62b98uzzzzjPz1r38VAFJXVxcxvmHDBrHb7fLOO+/IP//5T7n//vulpKREBgYG9Jp77rlHZsyYIXv27JF//OMfMnHiRFm8eHFc+y4vL5fXX39dDh06JC0tLfLDH/5QiouL5cKFC3rN8uXLpaioSBoaGmT//v0yd+5c+e53v6uPDw0NydSpU6WsrEwOHjwo27dvl7Fjx0p1dXXc+n7vvffkb3/7mxw/flza2trkF7/4hZjNZjl06FDS9nylvXv3yoQJE2T69OmyatUqfX0q9D5SUi58Zs+eLVVVVfrrUCgkhYWFUlNTo7Crf7syfMLhsDidTnnxxRf1db29vWK1WuWtt94SEZEjR44IANm3b59es2PHDtE0TU6fPp2w3ru7uwWANDY26n2azWbZunWrXnP06FEBIG63W0QuBa/BYBCPx6PXbNq0SWw2m/j9/oT1npubK3/4wx9Soue+vj6ZNGmS1NfXy1133aWHTyr0PpJS6mtXIBBAc3MzysrK9HUGgwFlZWVwu90KO/tqJ06cgMfjiejZbrdjzpw5es9utxs5OTmYNWuWXlNWVgaDwYCmpqaE9er1egH8+64Bzc3NCAaDEb1PmTIFxcXFEb1PmzYNDodDrykvL4fP58Phw4fj3nMoFEJtbS36+/vhcrlSoueqqipUVFRE9Aikxp/3SEqpq9rPnTuHUCgU8QcPAA6HA8eOHVPU1bV5PB4AuGrPw2MejwcFBQUR4yaTCXl5eXpNvIXDYaxevRp33nknpk6dqvdlsViQk5Nzzd6vtm3DY/HS2toKl8uFwcFBZGVloa6uDqWlpWhpaUnangGgtrYWBw4cwL59+740lsx/3vGQUuFD8VNVVYVDhw7hk08+Ud1KVCZPnoyWlhZ4vV68/fbbqKysRGNjo+q2rqmjowOrVq1CfX090tLSVLejXEp97Ro7diyMRuOXjv53dXXB6XQq6urahvu6Vs9OpxPd3d0R40NDQ+jp6UnIdq1YsQLbtm3DRx99hPHjx+vrnU4nAoEAent7r9n71bZteCxeLBYLJk6ciJkzZ6KmpgYzZszAyy+/nNQ9Nzc3o7u7G7fffjtMJhNMJhMaGxvxyiuvwGQyweFwJG3v8ZBS4WOxWDBz5kw0NDTo68LhMBoaGuByuRR29tVKSkrgdDojevb5fGhqatJ7drlc6O3tRXNzs16zc+dOhMNhzJkzJ269iQhWrFiBuro67Ny5EyUlJRHjM2fOhNlsjui9ra0N7e3tEb23trZGhGd9fT1sNhtKS0vj1vuVwuEw/H5/Uvc8b948tLa2oqWlRV9mzZqFJUuW6P+drL3Hheoj3rGqra0Vq9UqW7ZskSNHjsjjjz8uOTk5EUf/E62vr08OHjwoBw8eFADy61//Wg4ePCinTp0SkUtT7Tk5OfLuu+/Kp59+Kg888MBVp9q/853vSFNTk3zyyScyadKkuE+1P/HEE2K32+Xjjz+WM2fO6MvFixf1muXLl0txcbHs3LlT9u/fLy6XS1wulz4+PPU7f/58aWlpkQ8++EDy8/PjOvW7bt06aWxslBMnTsinn34q69atE03T5O9//3vS9vxVvjjblWq936iUCx8Rkd/+9rdSXFwsFotFZs+eLXv27FHaz0cffSQAvrRUVlaKyKXp9ueee04cDodYrVaZN2+etLW1RfyM8+fPy+LFiyUrK0tsNpssXbpU+vr64tr31XoGIK+//rpeMzAwIE8++aTk5uZKRkaGPPjgg3LmzJmIn3Py5ElZsGCBpKeny9ixY+Xpp5+WYDAYt75/9rOfyU033SQWi0Xy8/Nl3rx5evAka89f5crwSaXebxTv50NESqTUMR8iGj0YPkSkBMOHiJRg+BCREgwfIlKC4UNESjB8iEgJhg8RKcHwISIlGD5EpATDh4iU+P9wPqwU9+e7ZQAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 300x300 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "-19.613296377780664"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "play(True)[-1]"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "第7章-DQN算法.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
